# Differential expression on Kallisto data 

# Preliminary samples - 2016 dataset

# Packages and dependence
packageCheckClassic <- function(x){
  # 
  for( i in x ){
    if( ! require( i , character.only = TRUE ) ){
      install.packages( i , dependencies = TRUE )
      require( i , character.only = TRUE )
    }
  }
}

packageCheckClassic(c('DESeq2','adegenet','vsn','devtools','BiocManager','ggplot2','ggrepel','markdown','pheatmap','RColorBrewer','genefilter','gplots','vegan','dplyr','limma'))
## Le chargement a nécessité le package : DESeq2
## Le chargement a nécessité le package : S4Vectors
## Warning: le package 'S4Vectors' a été compilé avec la version R 4.1.3
## Le chargement a nécessité le package : stats4
## Le chargement a nécessité le package : BiocGenerics
## 
## Attachement du package : 'BiocGenerics'
## Les objets suivants sont masqués depuis 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## Les objets suivants sont masqués depuis 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## 
## Attachement du package : 'S4Vectors'
## Les objets suivants sont masqués depuis 'package:base':
## 
##     expand.grid, I, unname
## Le chargement a nécessité le package : IRanges
## Le chargement a nécessité le package : GenomicRanges
## Le chargement a nécessité le package : GenomeInfoDb
## Le chargement a nécessité le package : SummarizedExperiment
## Le chargement a nécessité le package : MatrixGenerics
## Le chargement a nécessité le package : matrixStats
## 
## Attachement du package : 'MatrixGenerics'
## Les objets suivants sont masqués depuis 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Le chargement a nécessité le package : Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attachement du package : 'Biobase'
## L'objet suivant est masqué depuis 'package:MatrixGenerics':
## 
##     rowMedians
## Les objets suivants sont masqués depuis 'package:matrixStats':
## 
##     anyMissing, rowMedians
## Le chargement a nécessité le package : adegenet
## Le chargement a nécessité le package : ade4
## 
## Attachement du package : 'ade4'
## L'objet suivant est masqué depuis 'package:GenomicRanges':
## 
##     score
## L'objet suivant est masqué depuis 'package:BiocGenerics':
## 
##     score
## 
##    /// adegenet 2.1.10 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()
## Le chargement a nécessité le package : vsn
## Le chargement a nécessité le package : devtools
## Le chargement a nécessité le package : usethis
## Le chargement a nécessité le package : BiocManager
## Bioconductor version '3.14' is out-of-date; the current release version '3.16'
##   is available with R version '4.2'; see https://bioconductor.org/install
## 
## Attachement du package : 'BiocManager'
## L'objet suivant est masqué depuis 'package:devtools':
## 
##     install
## Le chargement a nécessité le package : ggplot2
## Le chargement a nécessité le package : ggrepel
## Le chargement a nécessité le package : markdown
## Le chargement a nécessité le package : pheatmap
## Le chargement a nécessité le package : RColorBrewer
## Le chargement a nécessité le package : genefilter
## 
## Attachement du package : 'genefilter'
## Les objets suivants sont masqués depuis 'package:MatrixGenerics':
## 
##     rowSds, rowVars
## Les objets suivants sont masqués depuis 'package:matrixStats':
## 
##     rowSds, rowVars
## Le chargement a nécessité le package : gplots
## 
## Attachement du package : 'gplots'
## L'objet suivant est masqué depuis 'package:IRanges':
## 
##     space
## L'objet suivant est masqué depuis 'package:S4Vectors':
## 
##     space
## L'objet suivant est masqué depuis 'package:stats':
## 
##     lowess
## Le chargement a nécessité le package : vegan
## Le chargement a nécessité le package : permute
## 
## Attachement du package : 'permute'
## L'objet suivant est masqué depuis 'package:devtools':
## 
##     check
## Le chargement a nécessité le package : lattice
## This is vegan 2.6-4
## Le chargement a nécessité le package : dplyr
## 
## Attachement du package : 'dplyr'
## L'objet suivant est masqué depuis 'package:Biobase':
## 
##     combine
## L'objet suivant est masqué depuis 'package:matrixStats':
## 
##     count
## Les objets suivants sont masqués depuis 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## L'objet suivant est masqué depuis 'package:GenomeInfoDb':
## 
##     intersect
## Les objets suivants sont masqués depuis 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## Les objets suivants sont masqués depuis 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## Les objets suivants sont masqués depuis 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## Les objets suivants sont masqués depuis 'package:stats':
## 
##     filter, lag
## Les objets suivants sont masqués depuis 'package:base':
## 
##     intersect, setdiff, setequal, union
## Le chargement a nécessité le package : limma
## Warning: le package 'limma' a été compilé avec la version R 4.1.3
## 
## Attachement du package : 'limma'
## L'objet suivant est masqué depuis 'package:DESeq2':
## 
##     plotMA
## L'objet suivant est masqué depuis 'package:BiocGenerics':
## 
##     plotMA
#BiocManager::install('tximport', force = TRUE)
#BiocManager::install('apeglm')
#BiocManager::install('ashr')
#BiocManager::install("EnhancedVolcano")
#BiocManager::install("arrayQualityMetrics")
if (!require(devtools)) install.packages("devtools")
devtools::install_github("yanlinlin82/ggvenn")
## Skipping install of 'ggvenn' from a github remote, the SHA1 (25fd3b55) has not changed since last install.
##   Use `force = TRUE` to force installation
library("adegenet")
library('ggvenn')
## Le chargement a nécessité le package : grid
library('tximport')
library('apeglm')
library('ashr')
library('EnhancedVolcano')
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
library('BiocManager')
source_url("https://raw.githubusercontent.com/obigriffith/biostar-tutorials/master/Heatmaps/heatmap.3.R")
## ℹ SHA-1 hash of file is "015fc0457e61e3e93a903e69a24d96d2dac7b9fb"
# Working environment and data loading
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
#candidateGenes<-read.csv('candidateGenes.csv',header=T,sep=',')
samplesSaccharina<-read.table('saccharinaDesignSimple.txt',header=T)
samplesHedophylum<-read.table('hedophylumDesignSimple.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/kelpProject/kallistoOutput'
outputPath<-'/Users/mmeynadier/Documents/kelpProject/DESeq2Output'
#setwd(dataPath)

# DDS object

# If data from kallisto
tx2geneSaccharina<-read.table('Saccharina_tx2gene',header=T)
tx2geneHedophylum<-read.table('Hedophylum_tx2gene',header=T)


# 
# # Data importation - txImport
setwd(dataPath)
filesSaccharina<-paste0(samplesSaccharina$sample)
txiSaccharina<-tximport(files = filesSaccharina,type='kallisto',tx2gene = tx2geneSaccharina)
## Note: importing `abundance.h5` is typically faster than `abundance.tsv`
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8 
## transcripts missing from tx2gene: 8998
## summarizing abundance
## summarizing counts
## summarizing length
filesHedophylum<-paste0(samplesHedophylum$sample)
txiHedophylum<-tximport(files = filesHedophylum,type='kallisto',tx2gene = tx2geneHedophylum)
## Note: importing `abundance.h5` is typically faster than `abundance.tsv`
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8 9 
## transcripts missing from tx2gene: 9442
## summarizing abundance
## summarizing counts
## summarizing length
ddsSaccharina<-DESeqDataSetFromTximport(txiSaccharina,colData=samplesSaccharina,design= ~treatment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
ddsHedophylum<-DESeqDataSetFromTximport(txiHedophylum,colData=samplesHedophylum,design= ~treatment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
# pre-filtering
keepSaccharina <- rowSums(counts(ddsSaccharina)) >= 10 
ddsSaccharina <- ddsSaccharina[keepSaccharina,]
keepHedophylum <- rowSums(counts(ddsHedophylum)) >= 10 
ddsHedophylum <- ddsHedophylum[keepHedophylum,]

# Differential expression analysis
ddsSaccharina<-DESeq(ddsSaccharina)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsSaccharina))
##      [,1]               
## [1,] "Intercept"        
## [2,] "treatment_T1_vs_C"
## [3,] "treatment_T2_vs_C"
## [4,] "treatment_T3_vs_C"
ddsHedophylum<-DESeq(ddsHedophylum)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsHedophylum))
##      [,1]               
## [1,] "Intercept"        
## [2,] "treatment_T1_vs_C"
## [3,] "treatment_T2_vs_C"
## [4,] "treatment_T3_vs_C"
# Exploring the results - Saccharina

S_C_vs_T1 <- results(ddsSaccharina,contrast=c("treatment", "C", "T1"))
S_C_vs_T2 <- results(ddsSaccharina,contrast=c("treatment", "C", "T2"))
S_C_vs_T3 <- results(ddsSaccharina,contrast=c("treatment", "C", "T3"))
S_T1_vs_T2 <- results(ddsSaccharina,contrast=c("treatment", "T1", "T2"))
S_T1_vs_T3 <- results(ddsSaccharina,contrast=c("treatment", "T1", "T3"))
S_T2_vs_T3 <- results(ddsSaccharina,contrast=c("treatment", "T2", "T3"))

DESeq2::plotMA(S_C_vs_T1,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T1")

DESeq2::plotMA(S_C_vs_T2,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T2")

DESeq2::plotMA(S_C_vs_T3,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T3")

DESeq2::plotMA(S_T1_vs_T2,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T2")

DESeq2::plotMA(S_T1_vs_T3,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T3")

DESeq2::plotMA(S_T2_vs_T3,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT2 vs T3")

vsdSaccharina <- vst(ddsSaccharina, blind=T)

meanSdPlot(assay(vsdSaccharina))

ntd <- normTransform(ddsSaccharina)
meanSdPlot(assay(ntd))

select <- order(rowMeans(counts(ddsSaccharina,normalized=TRUE)),
                decreasing=TRUE)[1:20]
df <- as.data.frame(colData(ddsSaccharina)[,"treatment"])
pheatmap(assay(vsdSaccharina)[select,], cluster_rows=FALSE, show_rownames=F,
         cluster_cols=FALSE, annotation_col=df)

pcaData <- plotPCA(vsdSaccharina, intgroup="treatment", returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=treatment)) +
  geom_point(size=3) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
  coord_fixed()

sampleDists <- dist(t(assay(vsdSaccharina)))
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <-vsdSaccharina$treatment
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors)

# Hedophylum

H_C_vs_T1 <- results(ddsHedophylum,contrast=c("treatment", "C", "T1"))
H_C_vs_T2 <- results(ddsHedophylum,contrast=c("treatment", "C", "T2"))
H_C_vs_T3 <- results(ddsHedophylum,contrast=c("treatment", "C", "T3"))
H_T1_vs_T2 <- results(ddsHedophylum,contrast=c("treatment", "T1", "T2"))
H_T1_vs_T3 <- results(ddsHedophylum,contrast=c("treatment", "T1", "T3"))
H_T2_vs_T3 <- results(ddsHedophylum,contrast=c("treatment", "T2", "T3"))

DESeq2::plotMA(H_C_vs_T1,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T1")

DESeq2::plotMA(H_C_vs_T2,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T2")

DESeq2::plotMA(H_C_vs_T3,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T3")

DESeq2::plotMA(H_T1_vs_T2,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T2")

DESeq2::plotMA(H_T1_vs_T3,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T3")

DESeq2::plotMA(H_T2_vs_T3,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT2 vs T3")

vsdHedophylum <- vst(ddsHedophylum, blind=T)

meanSdPlot(assay(vsdHedophylum))

ntd <- normTransform(ddsHedophylum)
meanSdPlot(assay(ntd))

select <- order(rowMeans(counts(ddsHedophylum,normalized=TRUE)),
                decreasing=TRUE)[1:20]
df <- as.data.frame(colData(ddsHedophylum)[,"treatment"])
pheatmap(assay(vsdHedophylum)[select,], cluster_rows=FALSE, show_rownames=F,
         cluster_cols=FALSE, annotation_col=df)

pcaData <- plotPCA(vsdHedophylum, intgroup="treatment", returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=treatment)) +
  geom_point(size=3) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
  coord_fixed()

sampleDists <- dist(t(assay(vsdHedophylum)))
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- vsdHedophylum$treatment
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors)

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] fr_FR.UTF-8/fr_FR.UTF-8/fr_FR.UTF-8/C/fr_FR.UTF-8/fr_FR.UTF-8
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] EnhancedVolcano_1.12.0      ashr_2.2-54                
##  [3] apeglm_1.16.0               tximport_1.22.0            
##  [5] ggvenn_0.1.10               limma_3.50.3               
##  [7] dplyr_1.1.1                 vegan_2.6-4                
##  [9] lattice_0.20-45             permute_0.9-7              
## [11] gplots_3.1.3                genefilter_1.76.0          
## [13] RColorBrewer_1.1-3          pheatmap_1.0.12            
## [15] markdown_1.5                ggrepel_0.9.3              
## [17] ggplot2_3.4.2               BiocManager_1.30.20        
## [19] devtools_2.4.5              usethis_2.1.6              
## [21] vsn_3.62.0                  adegenet_2.1.10            
## [23] ade4_1.7-22                 DESeq2_1.34.0              
## [25] SummarizedExperiment_1.24.0 Biobase_2.54.0             
## [27] MatrixGenerics_1.6.0        matrixStats_0.63.0         
## [29] GenomicRanges_1.46.1        GenomeInfoDb_1.30.1        
## [31] IRanges_2.28.0              S4Vectors_0.32.4           
## [33] BiocGenerics_0.40.0        
## 
## loaded via a namespace (and not attached):
##   [1] plyr_1.8.8             igraph_1.4.1           splines_4.1.2         
##   [4] BiocParallel_1.28.3    digest_0.6.31          invgamma_1.1          
##   [7] htmltools_0.5.5        SQUAREM_2021.1         fansi_1.0.4           
##  [10] magrittr_2.0.3         memoise_2.0.1          cluster_2.1.4         
##  [13] tzdb_0.3.0             remotes_2.4.2          readr_2.1.4           
##  [16] Biostrings_2.62.0      annotate_1.72.0        extrafont_0.19        
##  [19] vroom_1.6.1            extrafontdb_1.0        bdsmatrix_1.3-6       
##  [22] prettyunits_1.1.1      colorspace_2.1-0       blob_1.2.4            
##  [25] xfun_0.38              hexbin_1.28.3          callr_3.7.3           
##  [28] crayon_1.5.2           RCurl_1.98-1.10        jsonlite_1.8.4        
##  [31] survival_3.5-5         ape_5.7-1              glue_1.6.2            
##  [34] gtable_0.3.3           zlibbioc_1.40.0        XVector_0.34.0        
##  [37] seqinr_4.2-23          proj4_1.0-12           DelayedArray_0.20.0   
##  [40] pkgbuild_1.4.0         Rttf2pt1_1.3.12        maps_3.4.1            
##  [43] scales_1.2.1           mvtnorm_1.1-3          DBI_1.1.3             
##  [46] miniUI_0.1.1.1         Rcpp_1.0.10            xtable_1.8-4          
##  [49] emdbook_1.3.12         bit_4.0.5              preprocessCore_1.56.0 
##  [52] truncnorm_1.0-9        profvis_0.3.7          htmlwidgets_1.6.2     
##  [55] httr_1.4.5             ellipsis_0.3.2         farver_2.1.1          
##  [58] urlchecker_1.0.1       pkgconfig_2.0.3        XML_3.99-0.14         
##  [61] sass_0.4.5             locfit_1.5-9.7         utf8_1.2.3            
##  [64] labeling_0.4.2         tidyselect_1.2.0       rlang_1.1.0           
##  [67] reshape2_1.4.4         later_1.3.0            AnnotationDbi_1.56.2  
##  [70] munsell_0.5.0          tools_4.1.2            cachem_1.0.7          
##  [73] cli_3.6.1              generics_0.1.3         RSQLite_2.3.0         
##  [76] evaluate_0.20          stringr_1.5.0          fastmap_1.1.1         
##  [79] yaml_2.3.7             processx_3.8.0         knitr_1.42            
##  [82] bit64_4.0.5            fs_1.6.1               caTools_1.18.2        
##  [85] purrr_1.0.1            KEGGREST_1.34.0        nlme_3.1-162          
##  [88] mime_0.12              ash_1.0-15             ggrastr_1.0.1         
##  [91] compiler_4.1.2         rstudioapi_0.14        beeswarm_0.4.0        
##  [94] curl_5.0.0             png_0.1-8              affyio_1.64.0         
##  [97] tibble_3.2.1           geneplotter_1.72.0     bslib_0.4.2           
## [100] stringi_1.7.12         ps_1.7.3               ggalt_0.4.0           
## [103] Matrix_1.5-1           vctrs_0.6.1            pillar_1.9.0          
## [106] lifecycle_1.0.3        jquerylib_0.1.4        irlba_2.3.5.1         
## [109] bitops_1.0-7           httpuv_1.6.9           R6_2.5.1              
## [112] affy_1.72.0            promises_1.2.0.1       KernSmooth_2.23-20    
## [115] vipor_0.4.5            sessioninfo_1.2.2      MASS_7.3-58.3         
## [118] gtools_3.9.4           pkgload_1.3.2          withr_2.5.0           
## [121] GenomeInfoDbData_1.2.7 hms_1.1.3              mgcv_1.8-42           
## [124] parallel_4.1.2         coda_0.19-4            rmarkdown_2.21        
## [127] mixsqp_0.3-48          bbmle_1.0.25           numDeriv_2016.8-1.1   
## [130] shiny_1.7.4            ggbeeswarm_0.7.1